4/11/23, 10:41 PM 


In [1]: 


import pandas as pd 


In [2]: 


df = pd.read csv('final.csv') 


In [3]: 


df.head() 


Out[3]: 


serial tempmax 


RAR OO N 


0 34.053151 
1 34.086179 
34.573984 
33.020325 


Bb c N 


30.660976 


5 rows x 26 columns 


In [4]: 


df.tail() 


Out[4]: 


597 
598 
599 
600 
601 


serial tempmax 


597 32.3 
598 32.7 
599 33.0 
600 35.1 
601 34.0 


5 rows x 26 columns 


In [5]; 


tempmin 
24.478082 
25.694309 
25.417886 
25.080488 
24.230894 


tempmin 
24.4 
26.4 
26.3 
26.8 


26.3 
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temp feelslikemax  feelslikemin  feelslike 
28.709863 39.757808 25.317808 32.306301 
29.464228 41.338211 28.140650 34.423577 
29.526829 40.464228 26.560163 33.085366 
28.727642 37.878049 26.193496 31.772358 
26.774797 36.586992 24.263415 28.943902 
temp feelslikemax  feelslikemin  feelslike dew 
28.5 35.9 244 31.2 23.3 
29.3 36.3 26.4 32.5 22.6 
29.8 40.5 26.3 34.5 23.9 
30.6 42.9 29.1 35.1 23.3 
30.2 38.1 26.3 33.1 21.5 


df - df.drop('serial', axis - 1) 
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dew 
22.971233 
23.484553 
22.580488 | 
21.752033 | 
24.214634 


humidity p 
75.0 
68.5 
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In [6]: 


df.shape 
Out[6]: 
(602, 25) 
In [7]: 
df.columns 


Out[7]: 


Index(['tempmax', 'tempmin', 'temp', 'feelslikemax', 'feelslikemin', 


'feelslike', 'dew', 'humidity', 'precip', 'precipprob', 'precipcove 


ro, 
'snow', 'snowdepth', 'windspeed', 'winddir', 'sealevelpressure', 
'cloudcover', 'visibility', 'solarradiation', 'solarenergy', 'uvind 
ex', 
'conditions', 'stations', 'cases', 'labels'], 
dtype-'object') 
In [8]: 


df.duplicated().sum() 


Out[8]: 


0 
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In [9]: 


df.isnull().sum() 


Out[9]: 


tempmax 
tempmin 

temp 
feelslikemax 
feelslikemin 
feelslike 
dew 

humidity 
precip 
precipprob 
precipcover 
snow 
snowdepth 
windspeed 
winddir 
sealevelpressure 
cloudcover 
visibility 
solarradiation 
solarenergy 
uvindex 
conditions 
stations 
cases 

labels 
dtype: int64 
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In [10]: 


df.info() 


<class 'pandas.core.frame.DataFrame'> 


RangeIndex: 602 entries, 0 to 601 
columns): 


Data columns (total 25 


# 


WON Gn n E WM KA G ! 


24 


Column 
tempmax 
tempmin 

temp 
feelslikemax 
feelslikemin 
feelslike 
dew 

humidity 
precip 
precipprob 
precipcover 
snow 
snowdepth 
windspeed 
winddir 
sealevelpressure 
cloudcover 
visibility 
solarradiation 
solarenergy 
uvindex 
conditions 
stations 
cases 

labels 


Non-Null Count 


602 


non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
non-null 
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float64 
float64 
float64 
float64 
float64 
float64 
float64 
float64 
float64 
float64 
float64 
int 64 

int64 

float64 
float64 
float64 
float64 
float64 
float64 
float64 
float64 
float64 
float64 
int 64 

object 


dtypes: float64(21), int64(3), object(1) 


memory usage: 
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In [11]: 


df.describe() 


Out[11]: 


tempmax tempmin temp feelslikemax feelslikemin feelslike dev 

count 602.000000 602.000000 602.000000 602.000000  602.000000 602.000000 602.00000( 
mean 31.918079 24.588318 27.813181 38.476069 25.613154 31.485111 23.98434¢ 
std 2.737215 2.727919 2.412416 4.776400 4.178797 4.672951 2.668014 
min 25.000000 12.740000 18.820000 25.000000 12.360000 . 18.626667 4.48000( 
25% 30.025000 23.200000 26.500000 35.600000 23.200000 28.100000 23.30000C 
50% 31.700000 25.000000 27.900000 38.400000 25.000000 | 31.500000  24.52926€ 
75% 33.600000 26.500000 29.448171 41.900000 26.600000 34.875000  25.4750(X 
max 41.200000 29.400000 33.300000 49.600000 37.900000 42.900000 28.10000C 


8 rows x 24 columns 


In [12]: 


df.nunique() 


Qut[12]: 

tempmax 118 
tempmin 126 
temp 125 
feelslikemax 192 
feelslikemin 157 
feelslike 202 
dew 123 
humidity 295 
precip 207 
precipprob 17 
precipcover 35 
Snow 1 
snowdepth 1 
windspeed 165 
winddir 539 
sealevelpressure 221 
cloudcover 408 
visibility 77 
solarradiation 523 
solarenergy 196 
uvindex 24 
conditions 20 
stations 17 
cases 578 
labels 1 


dtype: int64 
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In [13]: 


for i in range(df.shape[0]): 
if df.loc[i,'cases']>19000: 
df.loc[i,'labels']='Severe Risk' 
elif df.loc[i,'cases']>10000: 
df.loc[i,'labels']='High Risk' 
elif df.loc[i,'cases']>5000: 
df.loc[i,'labels']='Moderate Risk' 
elif df.loc[i,'cases']>1000: 
df.loc[i,'labels']='Low Risk' 
else: 
df.loc[i,'labels']='Minimal to No risk' 


In [14]: 


df.nunique() 


Out[14]: 

tempmax 118 
tempmin 126 
temp 125 
feelslikemax 192 
feelslikemin 157 
feelslike 202 
dew 123 
humidity 295 
precip 207 
precipprob 17 
precipcover 35 
snow 1 
snowdepth 1 
windspeed 165 
winddir 539 
sealevelpressure 221 
cloudcover 498 
visibility 77 
solarradiation 523 
solarenergy 196 
uvindex 24 
conditions 20 
stations 17 
cases 578 
labels 5 


dtype: int64 
In [15]: 


import matplotlib.pyplot as plt 
import seaborn as sns 
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In [16]: 


import warnings 
warnings.filterwarnings('ignore') 


In [17]: 


import numpy as np 


In [18]: 


df['labels'].unique() 


Out[18]: 

array(['Low Risk', 'Moderate Risk', 'High Risk', 'Minimal to No risk', 
'Severe Risk'], dtype=object) 

In [19]: 


df['labels'].value counts() 


Out[19]: 

Moderate Risk 173 
High Risk 154 
Minimal to No risk 124 
Low Risk 87 
Severe Risk 64 


Name: labels, dtype: int64 


In [20]: 


plt.figure(figsize=(15,6)) 
sns.countplot(df['labels'], data = df, palette = 'hls') 
plt.show() 


Low Risk Moderate Risk High Risk Minimal to No risk Severe Risk 
labels 
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In [21]: 


plt.figure(figsize=(30,20)) 


plt.pie(df['labels'].value counts(), labels-df['labels'].value counts().index, autopct= 


'color': 'black', 
'weight': 'bold', 
'family': 'serif' }) 
hfont = ('fontname':'serif', 'weight': 'bold'j 
plt.title('Labels', size-20, **hfont) 
plt.show() 


4 


Labels 


Moderate Risk 


High Risk 


Severe Risk 


Minimal to No risk 
Low Risk 
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In [22]: 


import plotly.express as px 


fig = px.bar(df, x="labels", y= df.index) 


fig.show() 


60k 


50k 


40k 


index 


30k 


20k 
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In [23]: 


value counts = df['labels'].value_counts() 
fig = px.pie(names=value counts.index, values=value counts.values) 
fig.update layout( 
title='Pie Chart of Labels', 
title x=0.5 
) 
fig.show() 


Pie Chart of Labels 
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In [24]: 
for i in df.columns: 
if i != 'labels': 
plt.figure(figsize=(15,6)) 
sns.histplot(df[i], kde = True, bins = 20, palette = 'hls') 
plt.xticks(rotation = 90) 
plt.show() 


In [25]: 


for i in df.columns: 
if i != 'labels': 
plt.figure(figsize=(15,6)) 
sns.distplot(df[i], kde = True, bins = 20) 
plt.xticks(rotation = 90) 
plt.show() 


Density 
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In [26]: 
for i in df.columns: 
if i != 'labels': 
plt.figure(figsize=(15,6)) 
sns.boxplot(df[i], data = df, palette = 'hls') 
plt.xticks(rotation = 90) 
plt.show() 
PN 
tempmax 
v 
In [27]: 
for i in df.columns: 
if i != 'labels': 
plt.figure(figsize=(15,6)) 
sns.violinplot(df[i], data = df, palette = 'hls') 
plt.xticks(rotation = 90) 
plt.show() 
^ 
9 5 å S Ñ A E $ 
tempmax 
y Y S 
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In [28]: 


for i in df.columns: 
if i != 'labels': 
fig = px.histogram(df[i], nbins=20, color discrete sequence=['#636EFA'], 
marginal='box', opacity=0.7) 
fig.update_layout( 
title=f'Histogram of {i}', 
xaxis_title=i, 
yaxis title-'Count', 
showlegend-False 


) 
fig.show() 
^ 
Histogram of tempmax B 
100 
80 
60 
Ww 
Cc 
= 
o 
O v 


localhost:8888/notebooks/dengue.ipynb 13/24 


4/11/23, 10:41 PM dengue - Jupyter Notebook 


In [29]: 


for i in df.columns: 
if i != 'labels': 
fig = 
template='plotly_white', width=800, height=400, 
marginal='rug', barmode='overlay', 
hover_data=[df['labels']]) 


fig.update layout(xaxis=dict(type='category', title=i, 
tickangle=-90, tickfont=dict(size=10)), 


yaxis_title='Frequency', showlegend=False) 
fig.show() 


Histogram of tempmax 
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px.histogram(df, x=i, color discrete sequence=['#636EFA'], nbins=20, opaci 
title=f'Histogram of {i}', labels={'x': i, 'count': 'Frequenc 
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In [30]: 
for i in df.columns: 
if i != 'labels': 
fig = px.box(df, x=df[i], color discrete sequence=['#636EFA']) 
fig.update_layout( 
title='Box Plot of ' + i, 
xaxis_title=i, 
yaxis_title='Value', 
) 
fig.show() 


Box Plot of tempmax 


v 
3 
S 
y 
In [31]: 
for i in df.columns: 
if i != 'labels': 
fig = px.violin(df, y=i, x='labels', box=True, points="all", hover_data=df.colum 
fig.update layout(title=i, xaxis title="Labels", yaxis title=i) 
fig.show() 
^ 
tempmax E 
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40 ° 
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In [32]: 


for i in df.columns: 
if i != 'labels': 
plt.figure(figsize=(15,6)) 
sns.barplot(x = df['labels'], y = df[i], data = df, ci = None, palette = 'hls') 
plt.xticks(rotation = 90) 
plt.show() 


x 
4 
[3 
z 
3 


Moderate Risk 
High Risk 

Minimal to No risk 
Severe Risk 


labels 


bi 
4 
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In [33]: 


for i in df.columns: 
if i != 'labels': 
fig = px.bar(df, x="labels", y=i, color="labels", barmode="group") 
fig.update layout ( 
title=f"{i} by Labels", 
xaxis_title="Labels", 
yaxis_title=i, 
legend_title="Labels" 


) 
fig.show() 


tempmax by Labels 
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tempmax 
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In [34]: 


for i in df.columns: 
for j in df.columns: 
if i !- j: 
if i !- 'labels': 
plt.figure(figsize-(15,6)) 
sns.lineplot(x = df[i], y = df[j], data = df, ci = None, palette = 'hls' 
plt.xticks(rotation = 90) 
plt.show() 


tempmin 


tempmax 


localhost:8888/notebooks/dengue.ipynb 18/24 


dengue - Jupyter Notebook 
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In [35]: 


for i in df.columns: 


for j in df.columns: 


j: 


if i ! 


if i 


"labels': 


plt.figure(figsize=(15,6)) 


sns.scatterplot(x 
plt.xticks(rotation 


plt.show() 


'h 


None, palette - 


df, ci 


- df[j], data - 


df[i], y 


90) 
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ev 
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In [37]: 


\plt.figure(figsize=(15,6)) 
'sns.pairplot(data = df, palette = 'hls') 
\plt.xticks(rotation = 90) 

“plt.show() 


<Figure size 1080x432 with 0 Axes> 
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In [38]: 


df corr = df.corr() 
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In [39]: 
df corr 
Out[39]: 
tempmax tempmin temp feelslikemax feelslikemin  feelslike ( 
tempmax 1.000000 0.527246 0.820941 0.832811 0.526838 0.732782 0.228 


tempmin 0.527246 1.000000 0.881988 0.716619 0.931412 0.883030 0.784 
temp 0.820941 0.881988 1.000000 0.863626 0.829122 0.949866 0.621 
feelslikemax 0.832811 0.716619 0.863626 1.000000 0.693604 0.891305 0.596 
feelslikemin 0.526838 0.931412 0.829122 0.693604 1.000000 0.883670 0.664 
feelslike 0.732782 0.883030 0.949866 0.891305 0.883670 1.000000 0.671 
dew 0.228436 0.784664 0.621103 0.596466 0.664503 0.671107 1.000 
humidity -0.602729 -0.022102 -0.335037 -0.219550 -0.112346 -0.231987 0.525 
precip -0.270202 -0.015915 -0.130827 -0.155520 -0.099895 -0.138739 0.155 
precipprob -0.226290 0.043788 -0.077521 -0.061133 -0.097888 -0.093543 0.337 
precipcover -0.139013 -0.047444 -0.118306 -0.083107 -0.131179 -0.150176 0.108 
snow NaN NaN NaN NaN NaN NaN f 
snowdepth NaN NaN NaN NaN NaN NaN f 
windspeed 0.234752 0.502966 0.451224 0.254755 0.460448 0.448998 0.255 
winddir 0.131311 0.130978 0.151874 0.096730 0.087622 0.114765 0.053 
sealevelpressure 0.011288 0.007517 0.011662 0.011925 0.002738 0.005758 0.010 
cloudcover -0.237572 0.223664 0.002212 0.034238 0.115816 0.021153 0.557 
visibility 0.358248 0.225410 0.299479 0.245154 0.235159 0.273086 -0.012 
solarradiation 0.239410 -0.034087 0.114995 0.167350 -0.007501 0.096916 -0.060 
solarenergy 0.239365 -0.034765 0.114794 0.167075 -0.008587 0.096444 -0.060 
uvindex 0.198627 -0.075002 0.056545 0.115035 -0.038326 0.042182 -0.109 
conditions -0.184010 -0.115677 -0.157189 -0.094710 -0.218707 -0.209585 0.159 
stations 0.381730 0.388823 0.421828 0.295325 0.382937 0.376258 0.175 
cases 0.284181 0.364691 0.378149 0.304087 0.376015 0.415344 0.179 


24 rows x 24 columns 
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In [41]: 


plt.figure(figsize=(30, 10)) 
matrix = np.triu(df_corr) 


sns.heatmap(df_corr, annot=True, linewidth=.8, mask=matrix, cmap="rocket"); 
plt.show() 


solarradiation 


solarenergy 


snow - 


snowdepth - 


In [44]: 


from sklearn.preprocessing import LabelEncoder 
le=LabelEncoder() 


df[ 'labels']=le.fit transform(df['labels']) 
X=df.iloc[:,:-1] 
y=df['labels'] 
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In [45]: 


X 


Out[45]: 


597 
598 
599 
600 
601 


tempmax 
34.053151 
34.086179 
34.573984 
33.020325 
30.660976 


32.300000 
32.700000 
33.000000 
35.100000 
34.000000 


tempmin 
24.478082 
25.694309 
25.417886 
25.080488 
24.230894 


24.400000 
26.400000 
26.300000 
26.800000 
26.300000 


602 rows x 24 columns 


In [46]: 


temp feelslikemax 


28.709863 
29.464228 
29.526829 
28.727642 
26.774797 


28.500000 
29.300000 
29.800000 
30.600000 
30.200000 


39.757808 
41.338211 
40.464228 
37.878049 
36.586992 


35.900000 
36.300000 
40.500000 
42.900000 
38.100000 
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feelslikemin 
25.317808 
28.140650 
26.560163 
26.193496 
24.263415 


24.400000 
26.400000 
26.300000 
29.100000 
26.300000 


from sklearn.model selection import train test split 


In [47]: 


X train, X test, y train, y test = train test split(X, y, test_size=0.2, stratify = df[' 
random state=42) 


In [48]: 


from sklearn.ensemble import RandomForestRegressor 


In [49]: 


reg=RandomForestRegressor() 
reg.fit(X_train,y_train) 


Out[49]: 


iRandomFores Regressor(). 
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feelslike 
32.306301 
34.423577 
33.085366 
31.772358 
28.943902 


31.200000 
32.500000 
34.500000 
35.100000 
33.100000 


dew 
22.971233 
23.484553 
22.580488 
21.752033 
24.214634 


23.300000 
22.600000 
23.900000 
23.300000 
21.500000 
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In [50]: 


y pred-reg.predict(X test) 


In [53]: 
reg.score(X test,y test) 
Out [53]: 
0.9943796068075117 


In [54]: 


from sklearn.model_selection import cross_val_score 
acc=cross val score(reg,X test,y test,cv=5).mean() 
print(acc) 


0.9404900487112752 
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